#### read in the 30 percent data
require(dplyr)
require(tidyr)
require(stargazer)
require(rsq)

fol_path= "D:\\Research\\global_refor_mac\\tabulated_data\\full_data\\"

# read in datasets
df_afr = readRDS(paste0(fol_path, "afr_50per_full.rds"))
df_asi = readRDS(paste0(fol_path, "asi_50per_full.rds"))
df_lat = readRDS(paste0(fol_path, "lat_50per_full.rds"))

df_plant = readRDS(paste0(fol_path, "plantat_long_agg.rds"))

# create new layers of forest cover
df_afr$per_forest_2000 = df_afr$per_all_forest + df_afr$per_for_loss
df_afr$per_forest_2000_2 = df_afr$per_forest_2000^2
df_afr$per_forest_2000_3 = df_afr$per_forest_2000^3
df_afr$per_forest_2000_4 = df_afr$per_forest_2000^4

# calculate percent 
df_asi$per_forest_2000 = df_asi$per_all_forest + df_asi$per_for_loss
df_asi$per_forest_2000_2 = df_asi$per_forest_2000^2
df_asi$per_forest_2000_3 = df_asi$per_forest_2000^3
df_asi$per_forest_2000_4 = df_asi$per_forest_2000^4

df_lat$per_forest_2000 = df_lat$per_all_forest + df_lat$per_for_loss
df_lat$per_forest_2000_2 = df_lat$per_forest_2000^2
df_lat$per_forest_2000_3 = df_lat$per_forest_2000^3
df_lat$per_forest_2000_4 = df_lat$per_forest_2000^4

# create left join of data
df_lat = left_join(df_lat, df_plant, by = "uni_ID")
df_afr = left_join(df_afr, df_plant, by = "uni_ID")
df_asi = left_join(df_asi, df_plant, by = "uni_ID")

# create percentage
df_lat$per_shc_all = df_lat$per_shc_1 + df_lat$per_shc_2 + df_lat$per_shc_3 + df_lat$per_shc_4 
df_afr$per_shc_all = df_afr$per_shc_1 + df_afr$per_shc_2 + df_afr$per_shc_3 + df_afr$per_shc_4 
df_asi$per_shc_all = df_asi$per_shc_1 + df_asi$per_shc_2 + df_asi$per_shc_3 + df_asi$per_shc_4 

df_eco = readRDS(paste0(fol_path, "eco_region.rds"))
csv_eco_region = read.csv(paste0("D:/Research/global_refor_mac/data/ecoregion_data/",
                                 "ecoregion_biome_translation.csv"), stringsAsFactors = FALSE)
df_eco$biome_num = csv_eco_region$BIOME_NUM[match(df_eco$ecoregions, csv_eco_region$ECO_ID)]

df_lat = left_join(df_lat, df_eco, by = "uni_ID")
df_afr = left_join(df_afr, df_eco, by = "uni_ID")
df_asi = left_join(df_asi, df_eco, by = "uni_ID")

df_lat$cont_var = "LatinAmerica"
df_afr$cont_var = "Africa"
df_asi$cont_var = "Asia"

# create full data
df_full = bind_rows(df_lat, df_afr, df_asi)

df_full$biome_num_f = factor(df_full$biome_num)
df_full$cont_var_f = factor(df_full$cont_var)
df_full$cont_num_f = factor(ifelse(df_full$cont_var == "LatinAmerica", 1,
                            ifelse(df_full$cont_var == "Africa", 2, 3)))
df_full$ecoregions_f = factor(df_full$ecoregions)
df_full$country_f = factor(df_full$country)

# save out the rds files 
saveRDS(df_full, paste0("D:/Research/global_refor_mac/results/rds_reg_files/", "all_cont_reg_50per.rds"))

# run continental regression 
reg_loss_cont = rxGlm(per_for_loss ~ agrev_dec_max_00_10 + per_forest_2000 + per_forest_2000_2 +
                      per_forest_2000_3 + per_forest_2000_4 + dis_city_750k_near + slope +
                      elev + iucnlow_2000 + iucnhigh_2000 + F(biome_num) + F(cont_num_f), 
                    data = df_full[(df_full$per_forest_2000 < 1), ], 
                    family = poisson(link = log), 
                    coefLabelStyle = "R", 
                    maxIterations = 1000)


reg_gain_cont = rxGlm(per_for_gain ~ agrev_dec_max_00_10 + per_forest_2000 + per_forest_2000_2 +
                        per_forest_2000_3 + per_forest_2000_4 + dis_city_750k_near + slope +
                        elev + iucnlow_2000 + iucnhigh_2000  + F(biome_num) + F(cont_num_f), 
                      data = df_full[df_full$per_forest_2000  > 0, ], 
                      family = poisson(link = log), 
                      coefLabelStyle = "R", 
                      maxIterations = 1000)


# save out the rds files 
saveRDS(reg_loss_cont, paste0("D:/Research/global_refor_mac/results/rds_reg_files/", "reg_loss_cont_50per.rds"))
saveRDS(reg_gain_cont, paste0("D:/Research/global_refor_mac/results/rds_reg_files/", "reg_gain_cont_50per.rds"))

